Jan-Philipp Kolb
9 Mai 2017
.xlsx, .csv, .dta oder ähnliches abgespeichert sondern in einem der folgenden Formate: .json, .xml etc.Die Struktur der Daten kann man sich mit einem JSON Viewer anschauen
jsonliteinstall.packages("jsonlite")library(jsonlite)
citation("jsonlite")##
## To cite jsonlite in publications use:
##
## Jeroen Ooms (2014). The jsonlite Package: A Practical and
## Consistent Mapping Between JSON Data and R Objects.
## arXiv:1403.2805 [stat.CO] URL http://arxiv.org/abs/1403.2805.
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## title = {The jsonlite Package: A Practical and Consistent Mapping Between JSON Data and R Objects},
## author = {Jeroen Ooms},
## journal = {arXiv:1403.2805 [stat.CO]},
## year = {2014},
## url = {http://arxiv.org/abs/1403.2805},
## }
library("jsonlite")
DRINKWATER <- fromJSON("data/RomDrinkingWater.geojson")names(DRINKWATER)[1:3]## [1] "type" "generator" "copyright"
names(DRINKWATER)[4:5]## [1] "timestamp" "features"
head(DRINKWATER$features)## type id properties.@id properties.amenity properties.flow
## 1 Feature node/246574149 node/246574149 drinking_water push-button
## 2 Feature node/246574150 node/246574150 drinking_water <NA>
## 3 Feature node/246574151 node/246574151 drinking_water <NA>
## 4 Feature node/248743324 node/248743324 drinking_water <NA>
## 5 Feature node/251773348 node/251773348 drinking_water <NA>
## 6 Feature node/251773551 node/251773551 drinking_water <NA>
## properties.type properties.name properties.name:fr properties.wheelchair
## 1 nasone <NA> <NA> <NA>
## 2 <NA> <NA> <NA> <NA>
## 3 <NA> <NA> <NA> <NA>
## 4 <NA> <NA> <NA> <NA>
## 5 nasone <NA> <NA> <NA>
## 6 <NA> Acqua Marcia Eau potable yes
## properties.created_by properties.indoor geometry.type
## 1 <NA> <NA> Point
## 2 <NA> <NA> Point
## 3 <NA> <NA> Point
## 4 <NA> <NA> Point
## 5 <NA> <NA> Point
## 6 <NA> <NA> Point
## geometry.coordinates
## 1 12.49191, 41.89479
## 2 12.49095, 41.89489
## 3 12.48774, 41.89450
## 4 12.48773, 41.89354
## 5 12.48529, 41.88539
## 6 12.48386, 41.89332
my_repos <- fromJSON("https://api.github.com/users/japhilko/repos")head(my_repos)## id name full_name
## 1 29143362 2015-01-15-EMBLHeidelberg Japhilko/2015-01-15-EMBLHeidelberg
## 2 39427013 DataAnalysis Japhilko/DataAnalysis
## 3 26485588 DataGeneration Japhilko/DataGeneration
## 4 26164276 DLR_IntroR Japhilko/DLR_IntroR
## 5 20760765 GeoData Japhilko/GeoData
## 6 55756271 geosmdata Japhilko/geosmdata
## owner.login owner.id
## 1 Japhilko 7593396
## 2 Japhilko 7593396
## 3 Japhilko 7593396
## 4 Japhilko 7593396
## 5 Japhilko 7593396
## 6 Japhilko 7593396
## owner.avatar_url owner.gravatar_id
## 1 https://avatars2.githubusercontent.com/u/7593396?v=3
## 2 https://avatars2.githubusercontent.com/u/7593396?v=3
## 3 https://avatars2.githubusercontent.com/u/7593396?v=3
## 4 https://avatars2.githubusercontent.com/u/7593396?v=3
## 5 https://avatars2.githubusercontent.com/u/7593396?v=3
## 6 https://avatars2.githubusercontent.com/u/7593396?v=3
## owner.url owner.html_url
## 1 https://api.github.com/users/Japhilko https://github.com/Japhilko
## 2 https://api.github.com/users/Japhilko https://github.com/Japhilko
## 3 https://api.github.com/users/Japhilko https://github.com/Japhilko
## 4 https://api.github.com/users/Japhilko https://github.com/Japhilko
## 5 https://api.github.com/users/Japhilko https://github.com/Japhilko
## 6 https://api.github.com/users/Japhilko https://github.com/Japhilko
## owner.followers_url
## 1 https://api.github.com/users/Japhilko/followers
## 2 https://api.github.com/users/Japhilko/followers
## 3 https://api.github.com/users/Japhilko/followers
## 4 https://api.github.com/users/Japhilko/followers
## 5 https://api.github.com/users/Japhilko/followers
## 6 https://api.github.com/users/Japhilko/followers
## owner.following_url
## 1 https://api.github.com/users/Japhilko/following{/other_user}
## 2 https://api.github.com/users/Japhilko/following{/other_user}
## 3 https://api.github.com/users/Japhilko/following{/other_user}
## 4 https://api.github.com/users/Japhilko/following{/other_user}
## 5 https://api.github.com/users/Japhilko/following{/other_user}
## 6 https://api.github.com/users/Japhilko/following{/other_user}
## owner.gists_url
## 1 https://api.github.com/users/Japhilko/gists{/gist_id}
## 2 https://api.github.com/users/Japhilko/gists{/gist_id}
## 3 https://api.github.com/users/Japhilko/gists{/gist_id}
## 4 https://api.github.com/users/Japhilko/gists{/gist_id}
## 5 https://api.github.com/users/Japhilko/gists{/gist_id}
## 6 https://api.github.com/users/Japhilko/gists{/gist_id}
## owner.starred_url
## 1 https://api.github.com/users/Japhilko/starred{/owner}{/repo}
## 2 https://api.github.com/users/Japhilko/starred{/owner}{/repo}
## 3 https://api.github.com/users/Japhilko/starred{/owner}{/repo}
## 4 https://api.github.com/users/Japhilko/starred{/owner}{/repo}
## 5 https://api.github.com/users/Japhilko/starred{/owner}{/repo}
## 6 https://api.github.com/users/Japhilko/starred{/owner}{/repo}
## owner.subscriptions_url
## 1 https://api.github.com/users/Japhilko/subscriptions
## 2 https://api.github.com/users/Japhilko/subscriptions
## 3 https://api.github.com/users/Japhilko/subscriptions
## 4 https://api.github.com/users/Japhilko/subscriptions
## 5 https://api.github.com/users/Japhilko/subscriptions
## 6 https://api.github.com/users/Japhilko/subscriptions
## owner.organizations_url
## 1 https://api.github.com/users/Japhilko/orgs
## 2 https://api.github.com/users/Japhilko/orgs
## 3 https://api.github.com/users/Japhilko/orgs
## 4 https://api.github.com/users/Japhilko/orgs
## 5 https://api.github.com/users/Japhilko/orgs
## 6 https://api.github.com/users/Japhilko/orgs
## owner.repos_url
## 1 https://api.github.com/users/Japhilko/repos
## 2 https://api.github.com/users/Japhilko/repos
## 3 https://api.github.com/users/Japhilko/repos
## 4 https://api.github.com/users/Japhilko/repos
## 5 https://api.github.com/users/Japhilko/repos
## 6 https://api.github.com/users/Japhilko/repos
## owner.events_url
## 1 https://api.github.com/users/Japhilko/events{/privacy}
## 2 https://api.github.com/users/Japhilko/events{/privacy}
## 3 https://api.github.com/users/Japhilko/events{/privacy}
## 4 https://api.github.com/users/Japhilko/events{/privacy}
## 5 https://api.github.com/users/Japhilko/events{/privacy}
## 6 https://api.github.com/users/Japhilko/events{/privacy}
## owner.received_events_url owner.type
## 1 https://api.github.com/users/Japhilko/received_events User
## 2 https://api.github.com/users/Japhilko/received_events User
## 3 https://api.github.com/users/Japhilko/received_events User
## 4 https://api.github.com/users/Japhilko/received_events User
## 5 https://api.github.com/users/Japhilko/received_events User
## 6 https://api.github.com/users/Japhilko/received_events User
## owner.site_admin private
## 1 FALSE FALSE
## 2 FALSE FALSE
## 3 FALSE FALSE
## 4 FALSE FALSE
## 5 FALSE FALSE
## 6 FALSE FALSE
## html_url
## 1 https://github.com/Japhilko/2015-01-15-EMBLHeidelberg
## 2 https://github.com/Japhilko/DataAnalysis
## 3 https://github.com/Japhilko/DataGeneration
## 4 https://github.com/Japhilko/DLR_IntroR
## 5 https://github.com/Japhilko/GeoData
## 6 https://github.com/Japhilko/geosmdata
## description fork
## 1 R programming and development (EMBL, Jan 2015) TRUE
## 2 My research on data analysis FALSE
## 3 Rcode for generating synthatic data FALSE
## 4 Unterlagen für DLR Workshop FALSE
## 5 Research on statistics and geodata FALSE
## 6 package to import OpenstreetMap data FALSE
## url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg
## 2 https://api.github.com/repos/Japhilko/DataAnalysis
## 3 https://api.github.com/repos/Japhilko/DataGeneration
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR
## 5 https://api.github.com/repos/Japhilko/GeoData
## 6 https://api.github.com/repos/Japhilko/geosmdata
## forks_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/forks
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/forks
## 3 https://api.github.com/repos/Japhilko/DataGeneration/forks
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/forks
## 5 https://api.github.com/repos/Japhilko/GeoData/forks
## 6 https://api.github.com/repos/Japhilko/geosmdata/forks
## keys_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/keys{/key_id}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/keys{/key_id}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/keys{/key_id}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/keys{/key_id}
## 5 https://api.github.com/repos/Japhilko/GeoData/keys{/key_id}
## 6 https://api.github.com/repos/Japhilko/geosmdata/keys{/key_id}
## collaborators_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/collaborators{/collaborator}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/collaborators{/collaborator}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/collaborators{/collaborator}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/collaborators{/collaborator}
## 5 https://api.github.com/repos/Japhilko/GeoData/collaborators{/collaborator}
## 6 https://api.github.com/repos/Japhilko/geosmdata/collaborators{/collaborator}
## teams_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/teams
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/teams
## 3 https://api.github.com/repos/Japhilko/DataGeneration/teams
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/teams
## 5 https://api.github.com/repos/Japhilko/GeoData/teams
## 6 https://api.github.com/repos/Japhilko/geosmdata/teams
## hooks_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/hooks
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/hooks
## 3 https://api.github.com/repos/Japhilko/DataGeneration/hooks
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/hooks
## 5 https://api.github.com/repos/Japhilko/GeoData/hooks
## 6 https://api.github.com/repos/Japhilko/geosmdata/hooks
## issue_events_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/issues/events{/number}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/issues/events{/number}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/issues/events{/number}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/issues/events{/number}
## 5 https://api.github.com/repos/Japhilko/GeoData/issues/events{/number}
## 6 https://api.github.com/repos/Japhilko/geosmdata/issues/events{/number}
## events_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/events
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/events
## 3 https://api.github.com/repos/Japhilko/DataGeneration/events
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/events
## 5 https://api.github.com/repos/Japhilko/GeoData/events
## 6 https://api.github.com/repos/Japhilko/geosmdata/events
## assignees_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/assignees{/user}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/assignees{/user}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/assignees{/user}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/assignees{/user}
## 5 https://api.github.com/repos/Japhilko/GeoData/assignees{/user}
## 6 https://api.github.com/repos/Japhilko/geosmdata/assignees{/user}
## branches_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/branches{/branch}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/branches{/branch}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/branches{/branch}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/branches{/branch}
## 5 https://api.github.com/repos/Japhilko/GeoData/branches{/branch}
## 6 https://api.github.com/repos/Japhilko/geosmdata/branches{/branch}
## tags_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/tags
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/tags
## 3 https://api.github.com/repos/Japhilko/DataGeneration/tags
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/tags
## 5 https://api.github.com/repos/Japhilko/GeoData/tags
## 6 https://api.github.com/repos/Japhilko/geosmdata/tags
## blobs_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/git/blobs{/sha}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/git/blobs{/sha}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/git/blobs{/sha}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/git/blobs{/sha}
## 5 https://api.github.com/repos/Japhilko/GeoData/git/blobs{/sha}
## 6 https://api.github.com/repos/Japhilko/geosmdata/git/blobs{/sha}
## git_tags_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/git/tags{/sha}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/git/tags{/sha}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/git/tags{/sha}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/git/tags{/sha}
## 5 https://api.github.com/repos/Japhilko/GeoData/git/tags{/sha}
## 6 https://api.github.com/repos/Japhilko/geosmdata/git/tags{/sha}
## git_refs_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/git/refs{/sha}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/git/refs{/sha}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/git/refs{/sha}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/git/refs{/sha}
## 5 https://api.github.com/repos/Japhilko/GeoData/git/refs{/sha}
## 6 https://api.github.com/repos/Japhilko/geosmdata/git/refs{/sha}
## trees_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/git/trees{/sha}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/git/trees{/sha}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/git/trees{/sha}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/git/trees{/sha}
## 5 https://api.github.com/repos/Japhilko/GeoData/git/trees{/sha}
## 6 https://api.github.com/repos/Japhilko/geosmdata/git/trees{/sha}
## statuses_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/statuses/{sha}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/statuses/{sha}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/statuses/{sha}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/statuses/{sha}
## 5 https://api.github.com/repos/Japhilko/GeoData/statuses/{sha}
## 6 https://api.github.com/repos/Japhilko/geosmdata/statuses/{sha}
## languages_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/languages
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/languages
## 3 https://api.github.com/repos/Japhilko/DataGeneration/languages
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/languages
## 5 https://api.github.com/repos/Japhilko/GeoData/languages
## 6 https://api.github.com/repos/Japhilko/geosmdata/languages
## stargazers_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/stargazers
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/stargazers
## 3 https://api.github.com/repos/Japhilko/DataGeneration/stargazers
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/stargazers
## 5 https://api.github.com/repos/Japhilko/GeoData/stargazers
## 6 https://api.github.com/repos/Japhilko/geosmdata/stargazers
## contributors_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/contributors
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/contributors
## 3 https://api.github.com/repos/Japhilko/DataGeneration/contributors
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/contributors
## 5 https://api.github.com/repos/Japhilko/GeoData/contributors
## 6 https://api.github.com/repos/Japhilko/geosmdata/contributors
## subscribers_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/subscribers
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/subscribers
## 3 https://api.github.com/repos/Japhilko/DataGeneration/subscribers
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/subscribers
## 5 https://api.github.com/repos/Japhilko/GeoData/subscribers
## 6 https://api.github.com/repos/Japhilko/geosmdata/subscribers
## subscription_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/subscription
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/subscription
## 3 https://api.github.com/repos/Japhilko/DataGeneration/subscription
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/subscription
## 5 https://api.github.com/repos/Japhilko/GeoData/subscription
## 6 https://api.github.com/repos/Japhilko/geosmdata/subscription
## commits_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/commits{/sha}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/commits{/sha}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/commits{/sha}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/commits{/sha}
## 5 https://api.github.com/repos/Japhilko/GeoData/commits{/sha}
## 6 https://api.github.com/repos/Japhilko/geosmdata/commits{/sha}
## git_commits_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/git/commits{/sha}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/git/commits{/sha}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/git/commits{/sha}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/git/commits{/sha}
## 5 https://api.github.com/repos/Japhilko/GeoData/git/commits{/sha}
## 6 https://api.github.com/repos/Japhilko/geosmdata/git/commits{/sha}
## comments_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/comments{/number}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/comments{/number}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/comments{/number}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/comments{/number}
## 5 https://api.github.com/repos/Japhilko/GeoData/comments{/number}
## 6 https://api.github.com/repos/Japhilko/geosmdata/comments{/number}
## issue_comment_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/issues/comments{/number}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/issues/comments{/number}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/issues/comments{/number}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/issues/comments{/number}
## 5 https://api.github.com/repos/Japhilko/GeoData/issues/comments{/number}
## 6 https://api.github.com/repos/Japhilko/geosmdata/issues/comments{/number}
## contents_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/contents/{+path}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/contents/{+path}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/contents/{+path}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/contents/{+path}
## 5 https://api.github.com/repos/Japhilko/GeoData/contents/{+path}
## 6 https://api.github.com/repos/Japhilko/geosmdata/contents/{+path}
## compare_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/compare/{base}...{head}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/compare/{base}...{head}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/compare/{base}...{head}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/compare/{base}...{head}
## 5 https://api.github.com/repos/Japhilko/GeoData/compare/{base}...{head}
## 6 https://api.github.com/repos/Japhilko/geosmdata/compare/{base}...{head}
## merges_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/merges
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/merges
## 3 https://api.github.com/repos/Japhilko/DataGeneration/merges
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/merges
## 5 https://api.github.com/repos/Japhilko/GeoData/merges
## 6 https://api.github.com/repos/Japhilko/geosmdata/merges
## archive_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/{archive_format}{/ref}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/{archive_format}{/ref}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/{archive_format}{/ref}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/{archive_format}{/ref}
## 5 https://api.github.com/repos/Japhilko/GeoData/{archive_format}{/ref}
## 6 https://api.github.com/repos/Japhilko/geosmdata/{archive_format}{/ref}
## downloads_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/downloads
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/downloads
## 3 https://api.github.com/repos/Japhilko/DataGeneration/downloads
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/downloads
## 5 https://api.github.com/repos/Japhilko/GeoData/downloads
## 6 https://api.github.com/repos/Japhilko/geosmdata/downloads
## issues_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/issues{/number}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/issues{/number}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/issues{/number}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/issues{/number}
## 5 https://api.github.com/repos/Japhilko/GeoData/issues{/number}
## 6 https://api.github.com/repos/Japhilko/geosmdata/issues{/number}
## pulls_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/pulls{/number}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/pulls{/number}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/pulls{/number}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/pulls{/number}
## 5 https://api.github.com/repos/Japhilko/GeoData/pulls{/number}
## 6 https://api.github.com/repos/Japhilko/geosmdata/pulls{/number}
## milestones_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/milestones{/number}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/milestones{/number}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/milestones{/number}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/milestones{/number}
## 5 https://api.github.com/repos/Japhilko/GeoData/milestones{/number}
## 6 https://api.github.com/repos/Japhilko/geosmdata/milestones{/number}
## notifications_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/notifications{?since,all,participating}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/notifications{?since,all,participating}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/notifications{?since,all,participating}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/notifications{?since,all,participating}
## 5 https://api.github.com/repos/Japhilko/GeoData/notifications{?since,all,participating}
## 6 https://api.github.com/repos/Japhilko/geosmdata/notifications{?since,all,participating}
## labels_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/labels{/name}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/labels{/name}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/labels{/name}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/labels{/name}
## 5 https://api.github.com/repos/Japhilko/GeoData/labels{/name}
## 6 https://api.github.com/repos/Japhilko/geosmdata/labels{/name}
## releases_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/releases{/id}
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/releases{/id}
## 3 https://api.github.com/repos/Japhilko/DataGeneration/releases{/id}
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/releases{/id}
## 5 https://api.github.com/repos/Japhilko/GeoData/releases{/id}
## 6 https://api.github.com/repos/Japhilko/geosmdata/releases{/id}
## deployments_url
## 1 https://api.github.com/repos/Japhilko/2015-01-15-EMBLHeidelberg/deployments
## 2 https://api.github.com/repos/Japhilko/DataAnalysis/deployments
## 3 https://api.github.com/repos/Japhilko/DataGeneration/deployments
## 4 https://api.github.com/repos/Japhilko/DLR_IntroR/deployments
## 5 https://api.github.com/repos/Japhilko/GeoData/deployments
## 6 https://api.github.com/repos/Japhilko/geosmdata/deployments
## created_at updated_at pushed_at
## 1 2015-01-12T15:59:33Z 2015-01-12T15:59:34Z 2015-01-10T22:26:12Z
## 2 2015-07-21T06:00:37Z 2016-02-04T13:01:54Z 2017-04-24T14:20:11Z
## 3 2014-11-11T13:14:01Z 2015-04-21T14:51:01Z 2015-07-27T13:59:39Z
## 4 2014-11-04T10:34:17Z 2016-07-26T08:22:47Z 2016-08-11T13:23:54Z
## 5 2014-06-12T08:51:41Z 2017-03-23T06:00:42Z 2017-03-23T15:31:16Z
## 6 2016-04-08T06:35:45Z 2016-06-06T10:36:01Z 2016-06-08T11:06:58Z
## git_url
## 1 git://github.com/Japhilko/2015-01-15-EMBLHeidelberg.git
## 2 git://github.com/Japhilko/DataAnalysis.git
## 3 git://github.com/Japhilko/DataGeneration.git
## 4 git://github.com/Japhilko/DLR_IntroR.git
## 5 git://github.com/Japhilko/GeoData.git
## 6 git://github.com/Japhilko/geosmdata.git
## ssh_url
## 1 git@github.com:Japhilko/2015-01-15-EMBLHeidelberg.git
## 2 git@github.com:Japhilko/DataAnalysis.git
## 3 git@github.com:Japhilko/DataGeneration.git
## 4 git@github.com:Japhilko/DLR_IntroR.git
## 5 git@github.com:Japhilko/GeoData.git
## 6 git@github.com:Japhilko/geosmdata.git
## clone_url
## 1 https://github.com/Japhilko/2015-01-15-EMBLHeidelberg.git
## 2 https://github.com/Japhilko/DataAnalysis.git
## 3 https://github.com/Japhilko/DataGeneration.git
## 4 https://github.com/Japhilko/DLR_IntroR.git
## 5 https://github.com/Japhilko/GeoData.git
## 6 https://github.com/Japhilko/geosmdata.git
## svn_url homepage size
## 1 https://github.com/Japhilko/2015-01-15-EMBLHeidelberg <NA> 5667
## 2 https://github.com/Japhilko/DataAnalysis <NA> 55636
## 3 https://github.com/Japhilko/DataGeneration <NA> 336
## 4 https://github.com/Japhilko/DLR_IntroR <NA> 32546
## 5 https://github.com/Japhilko/GeoData <NA> 1589706
## 6 https://github.com/Japhilko/geosmdata <NA> 19931
## stargazers_count watchers_count language has_issues has_projects
## 1 0 0 TeX FALSE TRUE
## 2 0 0 HTML TRUE TRUE
## 3 0 0 R TRUE TRUE
## 4 2 2 R TRUE TRUE
## 5 6 6 HTML TRUE TRUE
## 6 0 0 ActionScript TRUE TRUE
## has_downloads has_wiki has_pages forks_count mirror_url
## 1 TRUE TRUE FALSE 0 NA
## 2 TRUE TRUE FALSE 1 NA
## 3 TRUE TRUE TRUE 0 NA
## 4 TRUE TRUE FALSE 0 NA
## 5 TRUE TRUE TRUE 1 NA
## 6 TRUE TRUE FALSE 0 NA
## open_issues_count forks open_issues watchers default_branch
## 1 0 0 0 0 master
## 2 0 1 0 0 master
## 3 0 0 0 0 master
## 4 0 0 0 2 master
## 5 1 1 1 6 master
## 6 0 0 0 0 master
library(jsonlite)
res <- fromJSON('http://ergast.com/api/f1/2004/1/results.json')
drivers <- res$MRData$RaceTable$Races$Results[[1]]$Driver
library(DT)
datatable(drivers)article_key <- "&api-key=c2fede7bd9aea57c898f538e5ec0a1ee:6:68700045"
url <- "http://api.nytimes.com/svc/search/v2/articlesearch.json?q=obamacare+socialism"
req <- fromJSON(paste0(url, article_key))
articles <- req$response$docs
datatable(articles)XML Dateien einlesenXML Dateieninstall.packages("XML")library(XML)
citation("XML")##
## To cite package 'XML' in publications use:
##
## Duncan Temple Lang and the CRAN Team (2016). XML: Tools for
## Parsing and Generating XML Within R and S-Plus. R package
## version 3.98-1.5. https://CRAN.R-project.org/package=XML
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {XML: Tools for Parsing and Generating XML Within R and S-Plus},
## author = {Duncan Temple Lang and the CRAN Team},
## year = {2016},
## note = {R package version 3.98-1.5},
## url = {https://CRAN.R-project.org/package=XML},
## }
##
## ATTENTION: This citation information has been auto-generated from
## the package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
XML - Gaston Sanchezlibrary("XML")Gaston Sanchez - Dataflow
Seine Arbeit sieht man hier.
Gaston Sanchez - Webdaten bekommen
| Function | Description |
|---|---|
| xmlName() | name of the node |
| xmlSize() | number of subnodes |
| xmlAttrs() | named character vector of all attributes |
| xmlGetAttr() | value of a single attribute |
| xmlValue() | contents of a leaf node |
| xmlParent() | name of parent node |
| xmlAncestors() | name of ancestor nodes |
| getSibling() | siblings to the right or to the left |
| xmlNamespace() | the namespace (if there’s one) |
xml2 Paketinstall.packages("xml2")library(xml2)
citation("xml2")##
## To cite package 'xml2' in publications use:
##
## Hadley Wickham and James Hester (2016). xml2: Parse XML. R
## package version 1.0.0. https://CRAN.R-project.org/package=xml2
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {xml2: Parse XML},
## author = {Hadley Wickham and James Hester},
## year = {2016},
## note = {R package version 1.0.0},
## url = {https://CRAN.R-project.org/package=xml2},
## }
url <- "http://api.openstreetmap.org/api/0.6/
relation/62422"library(xml2)
BE <- xmlParse(url)Administrative Grenzen Berlin
xmltop = xmlRoot(BE)
class(xmltop)## [1] "XMLInternalElementNode" "XMLInternalNode"
## [3] "XMLAbstractNode"
xmlSize(xmltop)## [1] 1
xmlSize(xmltop[[1]])## [1] 328
Xpath, the XML Path Language, is a query language for selecting nodes from an XML document.
xpathApply(BE,"//tag[@k = 'source:population']")## [[1]]
## <tag k="source:population" v="http://www.statistik-berlin-brandenburg.de/Publikationen/Stat_Berichte/2010/SB_A1-1_A2-4_q01-10_BE.pdf 2010-10-01"/>
##
## attr(,"class")
## [1] "XMLNodeSet"
Administrative Grenzen für Deutschland
url <- "http://api.openstreetmap.org/api/0.6/relation/62422"BE <- xmlParse(url)Administrative Grenzen Berlin
xpathApply(BE,"//tag[@k = 'source:population']")## [[1]]
## <tag k="source:population" v="http://www.statistik-berlin-brandenburg.de/Publikationen/Stat_Berichte/2010/SB_A1-1_A2-4_q01-10_BE.pdf 2010-10-01"/>
##
## attr(,"class")
## [1] "XMLNodeSet"
xpathApply(BE,"//tag[@k = 'name:ta']")## [[1]]
## <tag k="name:ta" v="<U+0BAA><U+0BC6><U+0BB0><U+0BCD><U+0BB2><U+0BBF><U+0BA9><U+0BCD>"/>
##
## attr(,"class")
## [1] "XMLNodeSet"
region <- xpathApply(BE,
"//tag[@k = 'geographical_region']")
# regular expressions
region[[1]]## <tag k="geographical_region" v="Barnim;Berliner Urstromtal;Teltow;Nauener Platte"/>
<tag k="geographical_region"
v="Barnim;Berliner Urstromtal;
Teltow;Nauener Platte"/>
Barnim
url2<-"http://api.openstreetmap.org/api/0.6/node/25113879"
obj2<-xmlParse(url2)
obj_amenity<-xpathApply(obj2,"//tag[@k = 'amenity']")[[1]]
obj_amenity## <tag k="amenity" v="university"/>
xpathApply(obj2,"//tag[@k = 'wikipedia']")[[1]]## <tag k="wikipedia" v="de:Universität Mannheim"/>
xpathApply(obj2,"//tag[@k = 'wheelchair']")[[1]]xpathApply(obj2,"//tag[@k = 'name']")[[1]]url3<-"http://api.openstreetmap.org/api/0.6/node/303550876"
obj3 <- xmlParse(url3)
xpathApply(obj3,"//tag[@k = 'opening_hours']")[[1]]## <tag k="opening_hours" v="Mo-Sa 09:00-20:00; Su,PH off"/>
url5<-"http://api.openstreetmap.org/api/0.6/way/162149882"
obj5<-xmlParse(url5)
xpathApply(obj5,"//tag[@k = 'name']")[[1]]## <tag k="name" v="City-Airport Mannheim"/>
xpathApply(obj5,"//tag[@k = 'website']")[[1]]## <tag k="website" v="http://www.flugplatz-mannheim.de/"/>
xpathApply(obj5,"//tag[@k = 'iata']")[[1]]## <tag k="iata" v="MHG"/>
url2 <- "http://api.openstreetmap.org/api/0.6/node/2923760808"
RennesBa <- xmlParse(url2)
RennesBa## <?xml version="1.0" encoding="UTF-8"?>
## <osm version="0.6" generator="CGImap 0.6.0 (31539 thorn-01.openstreetmap.org)" copyright="OpenStreetMap and contributors" attribution="http://www.openstreetmap.org/copyright" license="http://opendatacommons.org/licenses/odbl/1-0/">
## <node id="2923760808" visible="true" version="7" changeset="47392918" timestamp="2017-04-02T20:42:05Z" user="FrShaft" uid="2377664" lat="48.1068780" lon="-1.6730415">
## <tag k="addr:city" v="Rennes"/>
## <tag k="addr:country" v="FR"/>
## <tag k="addr:housenumber" v="25"/>
## <tag k="addr:postcode" v="35000"/>
## <tag k="addr:street" v="Avenue Jean Janvier"/>
## <tag k="amenity" v="restaurant"/>
## <tag k="capacity" v="90"/>
## <tag k="name" v="Il Basilico"/>
## <tag k="source:addr:housenumber" v="Rennes Métropole"/>
## <tag k="source:addr:housenumber:ref" v="66075"/>
## <tag k="source:addr:housenumber:version" v="2013-04-02"/>
## <tag k="website" v="http://ilbasilico.fr"/>
## <tag k="wheelchair" v="limited"/>
## <tag k="wheelchair:description" v="Aucune sonnette pour indiquer sa présence mais une rampe d'accès peut être déployée."/>
## </node>
## </osm>
##
url3 <- "http://api.openstreetmap.org/api/0.6/way/72799743"
MadCalle <- xmlParse(url3)
MadCalle## <?xml version="1.0" encoding="UTF-8"?>
## <osm version="0.6" generator="CGImap 0.6.0 (18266 thorn-02.openstreetmap.org)" copyright="OpenStreetMap and contributors" attribution="http://www.openstreetmap.org/copyright" license="http://opendatacommons.org/licenses/odbl/1-0/">
## <way id="72799743" visible="true" version="5" changeset="11915713" timestamp="2012-06-16T14:49:40Z" user="Montgomery" uid="211405">
## <nd ref="869268876"/>
## <nd ref="1790008568"/>
## <nd ref="864117544"/>
## <nd ref="1790008571"/>
## <nd ref="1790008601"/>
## <nd ref="864117511"/>
## <nd ref="1790008612"/>
## <nd ref="1790008618"/>
## <nd ref="864117819"/>
## <tag k="highway" v="residential"/>
## <tag k="name" v="Calle Alfonso Ercilla"/>
## <tag k="oneway" v="yes"/>
## <tag k="surface" v="asphalt"/>
## </way>
## </osm>
##
Logo Overpass API
The Overpass API is a read-only API that serves up custom selected parts of the OSM map data.
Spielplätze Mannheim
Export Rohdaten
library(XML)
place <- "Mannheim"
type_obj <- "node"
object <- "leisure=playground"
InfoList <- xmlParse(paste(Link1,place,"\"];",
type_obj,"(area)[",object,"];out;",sep=""))Spielplätze in Mannheim
Die Liste der ID’s mit dem Wert playground:
node_id <- xpathApply(InfoList,
"//tag[@v= 'playground']/parent::node/@ id")
## node_id[[1]]Erste node id
lat_x <- xpathApply(InfoList,
"//tag[@v= 'playground']/parent::node/@ lat")
# lat_x[[1]];lat_x[[2]]lat_x <- xpathApply(InfoList,
"//tag[@v= 'playground']/parent::node/@ lon")Latitude Koordinate
library(devtools)
install_github("Japhilko/gosmd")library(gosmd)## Loading required package: maptools
## Loading required package: sp
## Checking rgeos availability: TRUE
## Loading required package: RJSONIO
##
## Attaching package: 'RJSONIO'
## The following objects are masked from 'package:jsonlite':
##
## fromJSON, toJSON
## Loading required package: stringr
pg_MA <- get_osm_nodes(object="leisure=playground",
"Mannheim")
info <- extract_osm_nodes(OSM.Data=pg_MA,
value="playground")| leisure | lat | lon | note | |
|---|---|---|---|---|
| 30560755 | playground | 49.51910 | 8.502807 | NA |
| 76468450 | playground | 49.49633 | 8.539396 | Rutsche, Schaukel, großer Sandkasten, Tischtennis |
| 76468534 | playground | 49.49678 | 8.552959 | NA |
| 76468535 | playground | 49.49230 | 8.548750 | NA |
| 76468536 | playground | 49.50243 | 8.548140 | Schaukel, Rutsche, Sandkasten, Spielhäuser, Tischtennis |
| 76468558 | playground | 49.49759 | 8.542036 | NA |
Deborah Nolan - Extracting data from XML
Duncan Temple Lang - A Short Introduction to the XML package for R
Noch mehr Informationen
rvestinstall.packages("rvest")rvest - Easily Harvest (Scrape) Web Pageslibrary(rvest)ht <- read_html('https://www.google.co.in/search?q=guitar+repair+workshop')
links <- ht %>% html_nodes(xpath='//h3/a') %>% html_attr('href')
gsub('/url\\?q=','',sapply(strsplit(links[as.vector(grep('url',links))],split='&'),'[',1))## [1] "http://theguitarrepairworkshop.com/"
## [2] "http://www.guitarservices.com/"
## [3] "http://www.guitarrepairbench.com/guitar-building-projects/guitar-workshop/guitar-workshop-project.html"
## [4] "https://www.facebook.com/The-Guitar-Repair-Workshop-847517635259712/"
## [5] "https://www.taylorguitars.com/dealer/guitar-repair-workshop-ltd"
## [6] "http://www.laweekly.com/music/10-best-guitar-repair-shops-in-los-angeles-4647166"
## [7] "http://guitarworkshopglasgow.com/pages/repairs-1"
## [8] "https://www.justdial.com/Mumbai/Guitar-Repair-Services/nct-10988623"
## [9] "https://www.justdial.com/Delhi-NCR/Guitar-Repair-Services/nct-10988623"
url4<-"http://api.openstreetmap.org/api/0.6/node/25439439"
obj4 <- xmlParse(url4)
xpathApply(obj4,"//tag[@k = 'railway:station_category']")[[1]]## <tag k="railway:station_category" v="2"/>
library(rvest)
bhfkat<-read_html(
"https://de.wikipedia.org/wiki/Bahnhofskategorie")
df_html_bhfkat<-html_table(
html_nodes(bhfkat, "table")[[1]],fill = TRUE)| Stufe | Bahnsteigkanten | Bahnsteiglänge | Reisende/Tag | Zughalte/Tag |
|---|---|---|---|---|
| 6 | 01 | > 000 bis 090 m | 00000 bis 00049 | 000 bis 0010 |
| 5 | 02 | > 090 bis 140 m | 00050 bis 00299 | 011 bis 0050 |
| 4 | 03 bis 04 | > 140 bis 170 m | 00300 bis 00999 | 051 bis 0100 |
| 3 | 05 bis 09 | > 170 bis 210 m | 01000 bis 09999 | 101 bis 0500 |
| 2 | 10 bis 14 | > 210 bis 280 m | 10.000 bis 49.999 | 501 bis 1000 |
| 1 | 00i ab 15 | > 280 m | 00000i ab 50.000 | 000i ab 1001 |
install.packages("tidyverse")library(tidyverse)
library(stringr)
library(forcats)
library(ggmap)
library(rvest)html.world_ports <- read_html("https://en.wikipedia.org/wiki/List_of_busiest_container_ports")
df.world_ports <- html_table(html_nodes(html.world_ports, "table")[[2]], fill = TRUE)glimpse(df.world_ports)## Observations: 50
## Variables: 15
## $ Rank <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16...
## $ Port <chr> "Shanghai", "Singapore", "Shenzhen", "Ningbo-Zhoushan...
## $ Economy <chr> "China", "Singapore", "China", "China", "Hong Kong", ...
## $ 2015[1] <chr> "36,516", "30,922", "24,142", "20,636", "20,073", "19...
## $ 2014[2] <chr> "35,268", "33,869", "23,798", "19,450", "22,374", "18...
## $ 2013[3] <chr> "33,617", "32,240", "23,280", "17,351", "22,352", "17...
## $ 2012[4] <chr> "32,529", "31,649", "22,940", "16,670", "23,117", "17...
## $ 2011[5] <chr> "31,700", "29,937", "22,570", "14,686", "24,384", "16...
## $ 2010[6] <chr> "29,069", "28,431", "22,510", "13,144", "23,532", "14...
## $ 2009[7] <chr> "25,002", "25,866", "18,250", "10,502", "20,983", "11...
## $ 2008[8] <chr> "27,980", "29,918", "21,414", "11,226", "24,248", "13...
## $ 2007[9] <chr> "26,150", "27,932", "21,099", "9,349", "23,881", "13,...
## $ 2006[10] <chr> "21,710", "24,792", "18,469", "7,068", "23,539", "12,...
## $ 2005[11] <chr> "18,084", "23,192", "16,197", "5,208", "22,427", "11,...
## $ 2004[12] <chr> "14,557", "21,329", "13,615", "4,006", "21,984", "11,...
rvestlibrary(rvest)
ht <- read_html('https://www.google.co.in/search?q=guitar+repair+workshop')
links <- ht %>% html_nodes(xpath='//h3/a') %>% html_attr('href')
gsub('/url\\?q=','',sapply(strsplit(links[as.vector(grep('url',links))],split='&'),'[',1))## [1] "http://theguitarrepairworkshop.com/"
## [2] "http://www.guitarservices.com/"
## [3] "http://www.guitarrepairbench.com/guitar-building-projects/guitar-workshop/guitar-workshop-project.html"
## [4] "https://www.facebook.com/The-Guitar-Repair-Workshop-847517635259712/"
## [5] "https://www.taylorguitars.com/dealer/guitar-repair-workshop-ltd"
## [6] "http://www.laweekly.com/music/10-best-guitar-repair-shops-in-los-angeles-4647166"
## [7] "http://guitarworkshopglasgow.com/pages/repairs-1"
## [8] "https://www.justdial.com/Mumbai/Guitar-Repair-Services/nct-10988623"
## [9] "https://www.justdial.com/Delhi-NCR/Guitar-Repair-Services/nct-10988623"
Im Folgenden werde ich zeigen, wie man Textinformationen aus Wikipedia herunterladen, verarbeiten und analysieren kann.
install.packages("NLP")
install.packages("tm")
install.packages("FactoMineR")stringi von Marek Gagolewski and Bartek Tartanus bietet Möglichkeiten zur String Verarbeitung.library("stringi")tm ist ein R-Paket um Text Mining zu realisieren. Es wurde von Ingo Feinerer, Kurt Hornik, und David Meyer geschrieben.library("tm")FactoMineR-Paket, das von Sebastien Le, Julie Josse und Francois Husson zur Durchführung der Hauptkomponentenanalyse erstellt wurde.library("FactoMineR")wiki <- "http://de.wikipedia.org/wiki/"
titles <- c("Zika-Virus", "Influenza-A-Virus_H1N1",
"Spanische_Grippe","Influenzavirus",
"Vogelgrippe_H5N1",
"Legionellose-Ausbruch_in_Warstein_2013",
"Legionellose-Ausbruch_in_Jülich_2014")articles <- character(length(titles))
for (i in 1:length(titles)){
articles[i] <- stri_flatten(
readLines(stri_paste(wiki, titles[i])), col = " ")
}
docs <- Corpus(VectorSource(articles))Das Folgende basiert auf einem Blogpost von Norbert Ryciak über die automatische Kategorisierung von Wikipedia-Artikeln.
docs2 <- tm_map(docs, function(x) stri_replace_all_regex(
x, "<.+?>", " "))
docs3 <- tm_map(docs2, function(x) stri_replace_all_fixed(
x, "\t", " "))docs4 <- tm_map(docs3, PlainTextDocument)
docs5 <- tm_map(docs4, stripWhitespace)
docs6 <- tm_map(docs5, removeWords, stopwords("german"))
docs7 <- tm_map(docs6, removePunctuation)
docs8 <- tm_map(docs7, tolower)
# docs8 <- tm_map(docs8, PlainTextDocument)dtm <- DocumentTermMatrix(docs8) dtm2 <- as.matrix(dtm)
frequency <- colSums(dtm2)
frequency <- sort(frequency, decreasing=TRUE)
words <- frequency[frequency>20]
s <- dtm2[1,which(colnames(dtm2) %in% names(words))]
for(i in 2:nrow(dtm2)){
s <- cbind(s,dtm2[i,which(colnames(dtm2) %in%
names(words))])
}
colnames(s) <- titlesPCA(s)## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 125 individuals, described by 7 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
s0 <- s/apply(s,1,sd)
h <- hclust(dist(t(s0)), method = "ward")
plot(h, labels = titles, sub = "")